##### This script computes latent ideology, uses it to predict PID7, and then uses it for validity checks against congressional vote and the party feeling thermometers.

##################################################
# LOAD DATA AND PACKAGES
##################################################

library(readstata13)

library(questionr)

library(scales)

library(mice)

library(texreg)

raw <- read.dta13('../data/anes_timeseries_cdf_dta/anes_timeseries_cdf_stata13.dta', convert.factors=F)

d <- raw

##################################################
# SELECT VARIABLES AND RECODE
##################################################

### 2012 and 2016 subset

d126 <- d[d$VCF0004 %in% c(2012, 2016),]

### demographic variables (and validity checks: congressional vote, Rep Party thermometer, Dem Party thermometer)

dd <- subset(d126, select=c("VCF0004", "VCF0017", "VCF0102", "VCF0104", "VCF0105a", "VCF0110", "VCF0114", "VCF0301", "VCF0302", "VCF0803", "VCF9023", "VCF0705", "VCF0707", "VCF0224", "VCF0218", "VCF0604", "VCF9259", "VCF0009x", "VCF0009y")) # VCF9027 was original pres vote

names(dd) <- c("year", "mode", "agecat", "gender", "race7", "edu4", "faminc5", "pid7", "pid3", "ideo7", "nonvotePres", "votePres", "voteCong", "repTherm", "demTherm", "trustfed", "attention", "ftfweight", "webweight")

## check voteCong, gopTherm, and demTherm variables

table(dd$voteCong)

table(dd$votePres)
# dd$votePres[dd$votePres==3] <- NA # convert other vote to missing

table(dd$demTherm) # Note: 98 and 99 are DK and NA, respectively

table(dd$repTherm) # Note: ditto

## create internet variable

dd$internet <- ifelse(dd$mode==4, 1, 0)

## create demographic variables

dd$man <- ifelse(dd$gender==1, 1, 0)

dd$racenew <- dd$race7
dd$racenew[dd$race7==1] <- "white"
dd$racenew[dd$race7==2] <- "black"
dd$racenew[dd$race7==3] <- "aapi"
dd$racenew[dd$race7==4] <- "aborig"
dd$racenew[dd$race7==5] <- "hisp"
dd$racenew[dd$race7==6] <- "other"
dd$racenew <- as.factor(dd$racenew)
dd$racenew <- relevel(dd$racenew, ref="white")

### policy variables

dp <- subset(d126, select=c("VCF0809", "VCF0806", "VCF0830", "VCF0823", "VCF9230", "VCF9232", "VCF0878", "VCF0879", "VCF0838", "VCF9238", "VCF9236", "VCF9047", "VCF0890"))

names(dp) <- c("jobguar", "govinsur", "aidblack", "isolation", "outsource", "torture", "gayadopt", "immigvolume", "prochoice", "guncontrol", "deathpen", "envirospend", "schoolspend")

### missing values

lapply(dp, function(x) table(x, exclude=F))

### recodes

dp$gayadopt[dp$gayadopt==5] <- 2

dp[dp==8] <- NA
dp[dp==9] <- NA

### rescale

dp2 <- apply(dp, 2, function(x) rescale(x, to=c(0,1)))

dp <- dp2

##################################################
# SUBSET YEARS
##################################################

dp12 <- dp[dd$year==2012,]

dp16 <- dp[dd$year==2016,]

dd12 <- dd[dd$year==2012,]

dd16 <- dd[dd$year==2016,]

##################################################
# PRESIDENTIAL VOTE CROSS-TABULATION (TABLE 3)
##################################################

100*prop.table(wtd.table(dd12$pid7[dd12$internet==0], dd12$votePres[dd12$internet==0], weights=dd12$ftfweight[dd12$internet==0]), margin=1) # 2012 pres. vote by PID7, for LI sample

100*prop.table(wtd.table(dd12$pid7[dd12$internet==1], dd12$votePres[dd12$internet==1], weights=dd12$webweight[dd12$internet==1]), margin=1) # 2012 pres. vote by PID7, for SA sample

100*prop.table(wtd.table(dd16$pid7[dd16$internet==0], dd16$votePres[dd16$internet==0], weights=dd16$ftfweight[dd16$internet==0]), margin=1)

100*prop.table(wtd.table(dd16$pid7[dd16$internet==1], dd16$votePres[dd16$internet==1], weights=dd16$webweight[dd16$internet==1]), margin=1)

##################################################
# IMPUTATION AND FACTOR ANALYSIS (TABLE B1)
##################################################

### multiple imputation

set.seed(1931)

policyimp12 <- mice(as.matrix(dp12))

policy12 <- complete(policyimp12)

set.seed(1931)

policyimp16 <- mice(as.matrix(dp16))

policy16 <- complete(policyimp16)

### factor analysis

fa12 <- factanal(policy12, factors=2, scores="Bartlett")

print(fa12) # for Table B1

fa16 <- factanal(policy16, factors=2, scores="Bartlett")

print(fa16) # for Table B1

### extract factor scores

scores12 <- fa12$scores

scores16 <- fa16$scores

##################################################
# REGRESSIONS OF PARTY ID ON LATENT IDEOLOGY (TABLE 4)
##################################################

## combine demographic data with ideology measure

data12 <- cbind.data.frame(dd12, scores12)

data16 <- cbind.data.frame(dd16, scores16)

### 'pure' models

lm12.ftf <- lm(pid7 ~ -1 + Factor1 + Factor2, weights=ftfweight, data=data12[data12$internet==0,])

lm12.net <- lm(pid7 ~ -1 + Factor1 + Factor2, weights=webweight, data=data12[data12$internet==1,])

lm16.ftf <- lm(pid7 ~ -1 + Factor1 + Factor2, weights=ftfweight, data=data16[data16$internet==0,])

lm16.net <- lm(pid7 ~ -1 + Factor1 + Factor2, weights=webweight, data=data16[data16$internet==1,])

### Write regression results to disk

reg.out <- htmlreg(l=list(lm12.ftf, lm12.net, lm16.ftf, lm16.net), digits=3, stars=numeric(0), single.row=T, pvalues=1, custom.model.names=c('PID7 2012 (LI)', "PID7 2012 (SA)", "PID7 2016 (LI)", "PID7 2016 (SA)"), custom.coef.names=c("1st dimension", "2nd dimension"), caption="Linear regressions of PID7 on latent ideology.")

writeLines(reg.out, con='../output/table4_ols_predict-pid7-from-latent-ideo.html')

##################################################
# CORRELATIONS OF PARTY THERMOMETERS WITH PID BOTH WAYS (TABLE C1)
##################################################

### Split samples into live-interview (internet==0) and self-admin (internet==1)

li12 <- data12[data12$internet==0,]

sa12 <- data12[data12$internet==1,]

li16 <- data16[data16$internet==0,]

sa16 <- data16[data16$internet==1,]

### Dem thermometer

li12.dem <- cor(li12$demTherm, li12$pid7, use="complete.obs")

sa12.dem <- cor(sa12$demTherm, sa12$pid7, use="complete.obs")

li16.dem <- cor(li16$demTherm, li16$pid7, use="complete.obs")

sa16.dem <- cor(sa16$demTherm, sa16$pid7, use="complete.obs")

### Rep thermometer

li12.rep <- cor(li12$repTherm, li12$pid7, use="complete.obs")

sa12.rep <- cor(sa12$repTherm, sa12$pid7, use="complete.obs")

li16.rep <- cor(li16$repTherm, li16$pid7, use="complete.obs")

sa16.rep <- cor(sa16$repTherm, sa16$pid7, use="complete.obs")

### Make vectors, round to 3 dights

live.cor <- round(c(li12.dem, li16.dem, li12.rep, li16.rep), 3)

self.cor <- round(c(sa12.dem, sa16.dem, sa12.rep, sa16.rep), 3)

cor.name <- c("D therm. 2012", "D therm. 2016", "R therm. 2012", "R therm. 2016")

both.cor <- cbind(cor.name, live.cor, self.cor)

dimnames(both.cor)[[2]] <- c("Cor. with PID7", "LI", "SA")

### Table

library(xtable)

cor.table <- print(xtable(both.cor), type="html", include.rownames=F)

writeLines(cor.table, con="../output/table_c1_correlations_validity.html")